Heatmap on whole data set.

Paul is happy that the replacement is working properly on the whole dataset, we can now try to sort out viz problems with that data.

load(here::here("analysis", "many.rda"))


all_fc_vals <- log2(dplyr::bind_rows(lapply(many, function(x){dplyr::select(x, fold_change) }))$fold_change)
max_fc <- max(all_fc_vals)
min_fc <- min(all_fc_vals)

ul <- max(abs(c(max_fc, min_fc)))
ll <- ul * -1

library(pepdiff)

ht <- plot_heatmap(many, metric = "bootstrap_t", log = TRUE, sig = 1, only_sig_points = FALSE, all_points = TRUE, scale_min = ll, scale_max = ul)
## Joining, by = "comparison"
## Joining, by = "gene_peptide"
lgd <- ComplexHeatmap::Legend(direction = "horizontal",
  col_fun = circlize::colorRamp2(
                           seq(ll, ul, length.out = 11),
                           rev(RColorBrewer::brewer.pal(11, "RdBu"))
                         ),
  title = "Log 2 Fold Change")

ComplexHeatmap::draw(ht, padding=grid::unit(c(0.5,0.5,0.5,4), "in"))
ComplexHeatmap::draw(lgd, x = grid::unit(8, "in"), y = grid::unit(1, "in"))

Looks much better! Let’s try with a filter on the sig value. I’ll hand roll the code to cross check for problems.

##extract the fc columns from the list of dfs
fc_mats <- lapply(many, function(x){
  srted <- dplyr::mutate(x, 
                gene_peptide = gene_id,
                log_fc = log2(fold_change)
                ) %>%
    dplyr::arrange(gene_peptide) %>%  ##crucial to sort rows to be in same order for later steps
    dplyr::select(gene_peptide, log_fc) 
  rname <- srted$gene_peptide
  srted$gene_peptide <- NULL
  srted_m <- as.matrix(srted)
  rownames(srted_m) <- rname
  return(srted_m)
})

##join the columns into a matrix
fc_m <- do.call(cbind, fc_mats)
colnames(fc_m) <- names(fc_mats)

##extract the sig columns from the list of dfs
sig_mats <- lapply(many, function(x){
  srted <- dplyr::mutate(x, 
                gene_peptide = paste(gene_id, peptide, sep=" "),
                ) %>%
    dplyr::arrange(gene_peptide) %>% 
    dplyr::select(gene_peptide, bootstrap_t_p_val) 
  rname <- srted$gene_peptide
  srted$gene_peptide <- NULL
  srted_m <- as.matrix(srted)
  rownames(srted_m) <- rname
  return(srted_m)
})

##join the columns into a matrix
sig_m <- do.call(cbind, sig_mats)
colnames(sig_m) <- names(sig_mats)

## work out if any rows (peptides) have any sig values <= 0.05 )
sig_rows <- apply(sig_m, 1, function(x) {any(x <= 0.05 )})

## deal with NAs from uncomplete bootstraps
sig_rows[is.na(sig_rows)] <- FALSE

## select only fc valeues for passing peptides
sig_fc_mat <- fc_m[sig_rows,]


## get upper and lower limits
max_fc <- max(sig_fc_mat)
min_fc <- min(sig_fc_mat)

ul <- max(abs(c(max_fc, min_fc)))
ll <- ul * -1


## draw! 
col_order <- names(sig_fc_mat)
new_ht <- ComplexHeatmap::Heatmap(sig_fc_mat, column_order = col_order,
                        col = circlize::colorRamp2(
                          seq(ll,ul, length.out = 11),
                          rev(RColorBrewer::brewer.pal(11, "RdBu")),
                        ),
                        show_heatmap_legend = FALSE,
                        #width=grid::unit(4, "in")
                        column_names_gp = grid::gpar(fontsize=24, fontface="bold"),
                        row_names_gp = grid::gpar(fontsize=15, fontface="bold")
                        )


lgd <- ComplexHeatmap::Legend(direction = "horizontal",
  col_fun = circlize::colorRamp2(
                           seq(ll, ul, length.out = 11),
                           rev(RColorBrewer::brewer.pal(11, "RdBu"))
                         ),
  legend_width = grid::unit(3, "in"),
  labels_gp =  grid::gpar(fontsize = 24, fontface = "bold"),
  title_gp = grid::gpar(fontsize = 24, fontface = "bold"),
  title = "Log 2 Fold Change")


ComplexHeatmap::draw(new_ht,
                     row_dend_width=grid::unit(3,"in"),
                     height=grid::unit(36, "in"),
                     legend_grid_height = grid::unit(2, "in"),
                     padding= grid::unit(c(0,0,0,3), "in"))
ComplexHeatmap::draw(lgd, x = grid::unit(1.7, "in"), y = grid::unit(1, "in"))

Great, after a lot of fiddling with graphics parameters, it doesn’t look too bad. It can’t be easily plot on A4 though! Paul also wants to use the heatmap to infer clusters of co-regulated peptides. That is a bit of a sloppy approach, so I’ll build a cluster analysis.

Estimating cluster counts

Work out the number of clusters in the data set of peptides with at least on significant peptide.

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(sig_fc_mat, kmeans, method="wss", k.max = 50)

The plot has a reasonable elbow, the line starts to show diminishing improvement at around 10 clusters, and that’s a manageable number, so lets divide into the ten best clusters.

Summarising the clusters

The following plot shows the centre value of each cluster.

kmeans_clust <- kmeans(sig_fc_mat, 10, nstart = 25, iter.max=1000)
str(kmeans_clust)
## List of 9
##  $ cluster     : Named int [1:182] 9 10 4 10 8 4 5 5 9 9 ...
##   ..- attr(*, "names")= chr [1:182] "gi|145609287|ref|XP_367574.2| (S4)" "gi|145610929|ref|XP_368444.2| MST7 (S318)" "gi|145610929|ref|XP_368444.2| MST7 (S358)" "gi|145610929|ref|XP_368444.2| MST7 (T332)" ...
##  $ centers     : num [1:10, 1:10] 7.475 3.459 0.95 2.457 0.262 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:10] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:10] "Guy11_1-Guy11_0" "Guy11_1.5-Guy11_0" "Guy11_2-Guy11_0" "Guy11_4-Guy11_0" ...
##  $ totss       : num 7056
##  $ withinss    : num [1:10] 0 111 103 259 192 ...
##  $ tot.withinss: num 1256
##  $ betweenss   : num 5801
##  $ size        : int [1:10] 1 12 23 28 30 13 7 31 35 2
##  $ iter        : int 3
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
kmean_summ <- ComplexHeatmap::Heatmap(kmeans_clust$centers,column_order = col_order, row_order = 1:10,
                        col = circlize::colorRamp2(
                          seq(ll,ul, length.out = 11),
                          rev(RColorBrewer::brewer.pal(11, "RdBu")),
                        ),
                        show_heatmap_legend = FALSE,)
lgd <- ComplexHeatmap::Legend(direction = "horizontal",
  col_fun = circlize::colorRamp2(
                           seq(ll, ul, length.out = 11),
                           rev(RColorBrewer::brewer.pal(11, "RdBu"))
                         ),
  title = "Log 2 Fold Change")

ComplexHeatmap::draw(kmean_summ,padding= grid::unit(c(0,0,0,1.5), "in"))
ComplexHeatmap::draw(lgd, x = grid::unit(6, "in"), y = grid::unit(0.5, "in"))

Plot a heatmap of each cluster

Clusters are in numeric order. A common legend is output at the end.

make_hmap <- function(mat, km, col_order) {
  r <- list(vector(mode = "list", length = max(km$cluster)))
  for (i in 1:max(km$cluster)){
    rnames <- names(km$cluster[km$cluster == i])
    m <- mat[rnames,]
    if (length(rnames) == 1){
      m <- t(as.matrix(m))
      rownames(m) <- rnames
    }
    r[i] <- ComplexHeatmap::Heatmap(m,column_order = col_order, name = paste("Cluster", i),
                        col = circlize::colorRamp2(
                          seq(ll,ul, length.out = 11),
                          rev(RColorBrewer::brewer.pal(11, "RdBu")),
                        ),
                        height = length(rnames)*grid::unit(5, "mm"),
                        show_heatmap_legend = FALSE,)
    
  }
 return(r) 
}

hml <- make_hmap(sig_fc_mat, kmeans_clust, col_order)
## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated

## Warning in `[<-`(`*tmp*`, i, value = ComplexHeatmap::Heatmap(m, column_order =
## col_order, : implicit list embedding of S4 objects is deprecated
for (i in 1:max(kmeans_clust$cluster)){
  ComplexHeatmap::draw(hml[[i]],padding= grid::unit(c(0,0,0,1.5), "in"))
}

ComplexHeatmap::draw(lgd, x = grid::unit(6, "in"), y = grid::unit(0.5, "in"))